rm(list = ls())
library(data.table)
library(tidyverse)
library(rJava)
library(RNetLogo)
library(lhs) # For maximin Latin hypercube sampling
library(ggplot2)
library(plotly) # For beautiful plotting
library(caret)
library(randomForest)
library(factoextra)
library(e1071)
library(TSrepr) # for evaluating predictive power
require(gridExtra)
options(warn = -1)
folder.path = "C:/Users/paslanpatir/Desktop/TEZ_v2/"
data.path = paste0(folder.path,"data/")
nl.model.1 = "Segregation"
model.type.1 = ifelse(nl.model.1 == "Segregation", "basic", "dummy")
output.folder.1 = paste0("outputs/outputs_50_100/outputs_WRep_Basic")
outputs.path.1 = paste0(folder.path,output.folder.1,"/")
nl.model.2 = ifelse(nl.model.1 == "Segregation", "Segregation_Dummy", "Segregation")
model.type.2 = ifelse(model.type.1 == "basic", "dummy", "basic")
output.folder.2 = paste0("outputs/outputs_50_100/outputs_WRep_Dummy")
outputs.path.2 = paste0(folder.path,output.folder.2,"/")
unlabeled_ins = 100
unlabeled_pool.name.1= paste0(data.path,"unlabeled_pool","_",model.type.1,"_",unlabeled_ins,".csv")
unlabeled_pool.name.1 <- fread(unlabeled_pool.name.1)
data_candidates.1 = copy(unlabeled_pool.name.1)
unlabeled_pool.name.2= paste0(data.path,"unlabeled_pool","_",model.type.2,"_",unlabeled_ins,".csv")
unlabeled_pool.name.2 <- fread(unlabeled_pool.name.2)
data_candidates.2 = copy(unlabeled_pool.name.2)
data_candidates_bind = rbind(data.table(data_candidates.1[,.(density,`%-similar-wanted`)], model = model.type.1)
,data.table(data_candidates.2[,.(density,`%-similar-wanted`)], model = model.type.2))
data_candidates_plot = ggplot(data = data_candidates_bind, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(color = model)) +
facet_wrap(~model) +
ggtitle(paste0("unlabeled data for models"))
data_candidates_plot
#ggsave(paste0(outputs.path,"unlabeled_", model.type,".png"))
test.seed.1 = c(0,1,2)
test.seed.2 = c(0,2)
test_ins = 100
test_set_all.1 = data.table()
for( i in test.seed.1){
test_set.name.1= paste0(data.path,"test_set","_",model.type.1,"_",test_ins,"_seed",i,".csv")
test_set.1 <- fread(test_set.name.1)
test_set_all.1 = rbind(test_set_all.1,data.table(test_set.1, "seed" = i))
rm(test_set.1,test_set.name.1)
}
test_set_all.2 = data.table()
for( i in test.seed.2){
test_set.name.2= paste0(data.path,"test_set","_",model.type.2,"_",test_ins,"_seed",i,".csv")
test_set.2 <- fread(test_set.name.2)
test_set_all.2 = rbind(test_set_all.2,data.table(test_set.2, "seed" = i))
rm(test_set.2,test_set.name.2)
}
test_set.name.1= paste0(data.path,"test_set","_",model.type.1,"_",test_ins,".csv")
test_set.1 <- fread(test_set.name.1)
test_set.name.2= paste0(data.path,"test_set","_",model.type.2,"_",test_ins,".csv")
test_set.2 <- fread(test_set.name.2)
test_set_bind = rbind(data.table(test_set_all.1[,.(density,`%-similar-wanted`,seed)], model = model.type.1)
,data.table(test_set_all.2[,.(density,`%-similar-wanted`,seed)], model = model.type.2))
test_set_plot = ggplot(data = test_set_bind, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(color = model)) +
facet_grid(seed~model) +
ggtitle(paste0("test data for models"))
test_set_plot
Aynı seedler için basic'te de dummy'de de iki important feature'ın dağılımı aynı.
seed.oneshot.1 = c(0,1,2,3,4,5,6,7,8,20)
seed.oneshot.2 = c(0,1,2,3,4,5,6,7,8,20)
train_ins_oneshot = 100
training_set_all.1 = data.table()
for( i in seed.oneshot.1){
training_set.name= paste0(data.path,"training_set","_",model.type.1,"_",train_ins_oneshot,"_seed",i,".csv")
training_set <- fread(training_set.name)
training_set_all.1 = rbind(training_set_all.1,data.table(training_set, "seed" = i))
rm(training_set,training_set.name)
}
training_set_all.2 = data.table()
for( i in seed.oneshot.2){
training_set.name= paste0(data.path,"training_set","_",model.type.2,"_",train_ins_oneshot,"_seed",i,".csv")
training_set <- fread(training_set.name)
training_set_all.2 = rbind(training_set_all.2,data.table(training_set, "seed" = i))
rm(training_set,training_set.name)
}
one_shot_data.1 = copy(training_set_all.1)
one_shot_data.2 = copy(training_set_all.2)
one_shot_bind = rbind(data.table(one_shot_data.1[,.(density,`%-similar-wanted`,seed,output)], model = model.type.1)
,data.table(one_shot_data.2[,.(density,`%-similar-wanted`,seed,output)], model = model.type.2))
one_shot_plot = ggplot(data = one_shot_bind, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(colour = output)) +
facet_grid(seed~model) +
labs(legend = "output") +
ggtitle(paste0("one_shot_data for models "))
one_shot_plot
# Eğer üst üste koyarsak, 10 tane seed'in toplamda cover edemediği alanları rahatlıkla görebiliriz.
ggplot(data = one_shot_bind, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(colour = output)) +
facet_grid(~model) +
labs(legend = "output") +
ggtitle(paste0("one_shot_data for models "))
seed.Ad.1 = c(0,1,2,3,4,5,6,7,8,20)
seed.Ad.2 = c(0,1,2,3,4,5,6,7,8,20)
train_ins_Ad = 50
training_set_Ad_all.1 = data.table()
for (i in seed.Ad.1) {
training_set.name = paste0(data.path, "training_set", "_", model.type.1, "_", train_ins_Ad, "_seed", i, ".csv")
training_set <- fread(training_set.name)
training_set_Ad_all.1 = rbind(training_set_Ad_all.1, data.table(training_set, seed = i))
rm(training_set, training_set.name)
}
training_set_Ad_all.2 = data.table()
for (i in seed.Ad.2) {
training_set.name = paste0(data.path, "training_set", "_", model.type.2, "_", train_ins_Ad, "_seed", i, ".csv")
training_set <- fread(training_set.name)
training_set_Ad_all.2 = rbind(training_set_Ad_all.2, data.table(training_set, seed = i))
rm(training_set, training_set.name)
}
adaptive_initial_data.1 = copy(training_set_Ad_all.1)
adaptive_initial_data.2 = copy(training_set_Ad_all.2)
adaptive_initial_bind = rbind(data.table(adaptive_initial_data.1[,.(density,`%-similar-wanted`,seed,output)], model = model.type.1)
,data.table(adaptive_initial_data.2[,.(density,`%-similar-wanted`,seed,output)], model = model.type.2))
adaptive_initial_plot = ggplot(data = adaptive_initial_bind, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(colour = output)) +
facet_grid(seed~model) +
labs(legend = "output") +
ggtitle(paste0("initial_adaptive_data for models "))
adaptive_initial_plot
# Eğer üst üste koyarsak, 10 tane seed'in toplamda cover edemediği alanları rahatlıkla görebiliriz.
ggplot(data = adaptive_initial_bind, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(colour = output)) +
facet_grid(~model) +
labs(legend = "output") +
ggtitle(paste0("overlapped initial adaptive data for models"))
#### OneShot ####
#### Model.1 ####
one_shot_path.1 = paste0(outputs.path.1,"oneshot/")
obb_error_oneshot.1 = fread(paste0(one_shot_path.1,model.type.1,"_obb_error_oneshot_all",".csv"))
performance_table_oneshot.1 = fread(paste0(one_shot_path.1,model.type.1,"_performance_table_oneshot",".csv"))
predictedLabels_oneshot.1 = fread(paste0(one_shot_path.1,model.type.1,"_predictedLabels_oneshot_all",".csv"))
performance_molten_oneshot.1 <- melt(data = performance_table_oneshot.1
, id.vars = c('iter',"seed","rep"))
setnames(performance_molten_oneshot.1, c("variable","value"),c("errortype","errorvalue"))
#### oob error ####
oob_oneshot_plot.1 = ggplot(obb_error_oneshot.1, aes(x=as.factor(rep), y = obb_error)) +
geom_point(aes(colour = as.factor(rep))) +
#geom_hline( aes(yintercept = obb_error, color = obb_error )) +
# facet_grid(rep ~ seed) +
facet_wrap(~ seed) +
ggtitle(paste0("oob error with oneshot sampling for ",model.type.1))
oob_oneshot_plot.1
#### test error ####
one_shot_ind_p.1 = ggplot(performance_molten_oneshot.1, aes(x=as.factor(rep),y = errorvalue, group=errortype, col=errortype)) +
geom_point() +
facet_wrap( ~ seed) +
geom_hline(yintercept = mean(performance_molten_oneshot.1[errortype=="rmse"]$errorvalue), color = "green") +
ggtitle(paste0("test error with Oneshot Sampling for ",model.type.1))
one_shot_ind_p.1
bxp.oneshot.1 <- boxplot( data = performance_molten_oneshot.1[errortype == "rmse"], errorvalue ~ seed, col = "green")
bxp.oneshot.1
grid.arrange(oob_oneshot_plot.1,one_shot_ind_p.1, ncol = 2)
Seed 20 : train datasını iyi öğrenmiş ama test datasında zayıfsız kalmış Seed 8 : 20'nin tam tersi Seed 6 : trainde dalgalanma geniş ama test için iyi denebilir.
#### Actual vs Fitted ####
slct_seed= 6
a_vs_f_oneshot.1 <- ggplot(predictedLabels_oneshot.1[seed == slct_seed]
,aes(x = output, y =pred_output, color = pred_output - output)) +
geom_point() +
geom_abline() +
facet_wrap(~ rep) +
xlab("actual values") +
ylab("fitted values") +
ggtitle(paste0("Actual vs Fitted for OneShot Sampling","\n"," seed:",slct_seed,"\n","for ",model.type.1))
a_vs_f_oneshot.1
#### Replications on Each Seed ####
one_shot_ind_bxp.1 = ggplot(data = performance_molten_oneshot.1, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot() +
facet_wrap(~ seed,scales = "free") +
ggtitle(paste0(" test performances","\n","with Oneshot Sampling for model_",model.type.1))
one_shot_ind_bxp.1
performance_table_oneshot.1[, .(mean_rmse = mean(rmse)),.(seed)][order(mean_rmse)]
one_shot_ind_pbxp.1 = ggplot(data = performance_table_oneshot.1, aes(y =rmse, x = rep )) +
geom_point(aes(colour = as.factor(rep))) +
geom_boxplot() +
facet_wrap(~ seed) +
labs(legend = "rmse")
one_shot_ind_pbxp.1
one_shot_bxp.1 = ggplot(data = performance_molten_oneshot.1, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
# facet_wrap(~ seed) +
labs( caption = paste0("meanRMSE : ",mean(performance_table_oneshot.1$rmse))) +
ggtitle(paste0("overall test performace","\n"," with Oneshot Sampling for ",model.type.1))
one_shot_bxp.1
boxplot.stats(performance_table_oneshot.1$rmse)
#### Model.2 ####
one_shot_path.2 = paste0(outputs.path.2,"oneshot/")
obb_error_oneshot.2 = fread(paste0(one_shot_path.2,model.type.2,"_obb_error_oneshot_all",".csv"))
performance_table_oneshot.2 = fread(paste0(one_shot_path.2,model.type.2,"_performance_table_oneshot",".csv"))
predictedLabels_oneshot.2 = fread(paste0(one_shot_path.2,model.type.2,"_predictedLabels_oneshot_all",".csv"))
performance_molten_oneshot.2 <- melt(data = performance_table_oneshot.2
, id.vars = c('iter',"seed","rep"))
setnames(performance_molten_oneshot.2, c("variable","value"),c("errortype","errorvalue"))
#### oob error ####
oob_oneshot_plot.2 = ggplot(obb_error_oneshot.2, aes(x=as.factor(rep), y = obb_error)) +
geom_point(aes(colour = as.factor(rep))) +
#geom_hline( aes(yintercept = obb_error, color = obb_error )) +
# facet_grid(rep ~ seed) +
facet_wrap(~ seed) +
ggtitle(paste0("oob error with oneshot sampling for ",model.type.2))
oob_oneshot_plot.2
#### test error ####
one_shot_ind_p.2 = ggplot(performance_molten_oneshot.2, aes(x=as.factor(rep),y = errorvalue, group=errortype, col=errortype)) +
geom_point() +
facet_wrap( ~ seed) +
geom_hline(yintercept = mean(performance_molten_oneshot.2[errortype=="rmse"]$errorvalue), color = "green") +
ggtitle(paste0("test error with Oneshot Sampling for ",model.type.2))
one_shot_ind_p.2
bxp.oneshot.2 <- boxplot( data = performance_molten_oneshot.2[errortype == "rmse"], errorvalue ~ seed, col = "green")
bxp.oneshot.2
grid.arrange(oob_oneshot_plot.2,one_shot_ind_p.2, ncol = 2)
#### Actual vs Fitted ####
slct_seed= 2
a_vs_f_oneshot.2 <- ggplot(predictedLabels_oneshot.2[seed == slct_seed]
,aes(x = output, y =pred_output, color = pred_output - output)) +
geom_point() +
geom_abline() +
facet_wrap(~ rep) +
xlab("actual values") +
ylab("fitted values") +
ggtitle(paste0("Actual vs Fitted for OneShot Sampling","\n"," seed:",slct_seed,"\n","for ",model.type.2))
a_vs_f_oneshot.2
#### Replications on Each Seed ####
one_shot_ind_bxp.2 = ggplot(data = performance_molten_oneshot.2, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot() +
facet_wrap(~ seed, scales = "free") +
ggtitle(paste0(" test performances","\n","with Oneshot Sampling for model_",model.type.2))
one_shot_ind_bxp.2
performance_table_oneshot.2[, .(mean_rmse = mean(rmse)),.(seed)][order(mean_rmse)]
one_shot_ind_pbxp.2 = ggplot(data = performance_table_oneshot.2, aes(y =rmse, x = rep )) +
geom_point(aes(colour = as.factor(rep))) +
geom_boxplot() +
facet_wrap(~ seed) +
labs(legend = "rmse")
one_shot_ind_pbxp.2
one_shot_bxp.2 = ggplot(data = performance_molten_oneshot.2, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
# facet_wrap(~ seed) +
labs( caption = paste0("meanRMSE : ",mean(performance_table_oneshot.2$rmse))) +
ggtitle(paste0("overall test performace","\n"," with Oneshot Sampling for ",model.type.2))
one_shot_bxp.2
boxplot.stats(performance_table_oneshot.2$rmse)
cbind( performance_table_oneshot.1[, .(mean_rmse = mean(rmse)),.(seed)][order(mean_rmse)]
,performance_table_oneshot.2[, .(mean_rmse = mean(rmse)),.(seed)][order(mean_rmse)])
#### Random Sampling Replications ####
#### Model.1 ####
Rd_path.1 = paste0(outputs.path.1,"Rd/")
obb_error_Rd.1 = fread(paste0(Rd_path.1,model.type.1,"_obb_error_Rd",".csv"))
performance_table_Rd.1 = fread(paste0(Rd_path.1,model.type.1,"_performance_table_Rd",".csv"))
predictedLabels_Rd.1 = fread(paste0(Rd_path.1,model.type.1,"_predictedLabels_table_Rd",".csv"))
FinalTrainData_Rd.1 = fread(paste0(Rd_path.1,model.type.1,"_FinalTrainData_Rd",".csv"))
train_candidates_Rd.1 = fread(paste0(Rd_path.1,model.type.1,"_train_candidates_table_Rd",".csv"))
performance_molten_Rd_seq.1 <- melt(data = performance_table_Rd.1
, id.vars = c('iter',"seed","rep"))
setnames(performance_molten_Rd_seq.1, c("variable","value"),c("errortype","errorvalue"))
predictedLabels_molten_Rd_seq.1 <- melt(data = predictedLabels_Rd.1
, id.vars = c("density","%-similar-wanted",'output',"seed","rep")
, measure.vars = c("pred_output_1","pred_output_2","pred_output_3","pred_output_4","pred_output_5","pred_output_6","pred_output_7","pred_output_8","pred_output_9","pred_output_10","pred_output_11"))
setnames(predictedLabels_molten_Rd_seq.1, c("variable","value"),c("iter","pred_output"))
predictedLabels_molten_Rd_seq.1[,iter := as.numeric(str_sub(iter, 13))]
#### oob error ####
oob_Rd_seq_plot.1 = ggplot(obb_error_Rd.1, aes(x=iter, y = obb_error)) +
geom_line( aes(color = as.factor(rep))) +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential oob error with Rd Sampling for ",model.type.1))
oob_Rd_seq_plot.1
#### test error ####
test_error_Rd_seq_plot.1 = ggplot(performance_molten_Rd_seq.1, aes(x=iter,y = errorvalue, group=errortype, col=errortype)) +
geom_line() +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential test error with Rd Sampling for ",model.type.1))
test_error_Rd_seq_plot.1
#### Actual vs Fitted ####
# The last iteration(pred_output_11) on the 10th replication
slct_rep = 10
slct_seed= 2
a_vs_f_Rd_seq.1 <- ggplot(predictedLabels_molten_Rd_seq.1[rep == slct_rep & seed == slct_seed]
,aes(x = output, y =pred_output, color = pred_output - output)) +
geom_point() +
geom_abline() +
facet_wrap( ~ iter) +
xlab("actual values") +
ylab("fitted values") +
ggtitle(paste0("Actual vs Fitted for Rd Sampling","\n","rep:",slct_rep," and seed:",slct_seed,"\n","for ",model.type.1))
a_vs_f_Rd_seq.1
#### Replications on Each Iteration ####
# Observe the boxplots in each iterations
Rd_seq_ind_bxp.1 = ggplot(performance_molten_Rd_seq.1, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot()+
facet_wrap(~iter) +
# geom_hline(data = performance_molten_oneshot, aes(yintercept = errorvalue, group=errortype, col=errortype),stat = "hline", linetype = "dashed") +
ggtitle(paste0("sequential test performances","\n","with Rd Sampling for model_",model.type.1))
Rd_seq_ind_bxp.1
Rd_seq_ind_bxp_seed.1 = ggplot(performance_molten_Rd_seq.1
, aes(y = errorvalue, group = errortype, col = errortype)) +
geom_boxplot() +
facet_grid(seed ~ iter ) +
ggtitle(paste0("sequential test performances by Seeds","\n","with Rd Sampling for ",model.type.1))
Rd_seq_ind_bxp_seed.1
#### Overall BoxPlot ####
Rd_seq_bxp.1 = ggplot(data = performance_molten_Rd_seq.1[iter == 11], aes( y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
labs( caption = paste0("final meanRMSE : ",mean(performance_table_Rd.1[iter == 11]$rmse))) +
ggtitle(paste0("final iteration test performace","\n"," with Rd Sampling for ",model.type.1))
Rd_seq_bxp.1
boxplot.stats(performance_table_Rd.1[iter == 11]$rmse)
#### Train Candidates ####
tc_Rd_seq_plot.1 = ggplot(adaptive_initial_data.1, aes(x = density, y = `%-similar-wanted`)) +
geom_point(color = "grey") +
geom_point(data = train_candidates_Rd.1[rep == 10], aes(colour = as.factor(iter))) +
facet_wrap(~ seed) +
labs(legend = "output") +
ggtitle(paste0("Train Candidates for each Seed","\n","with Rd Sampling for ",model.type.1))
tc_Rd_seq_plot.1
#### Final Data ####
final_train_data_Rd.1 = ggplot(data = FinalTrainData_Rd.1, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(colour = output)) +
facet_grid(seed~rep) +
#facet_warp(~rep) +
#facet_warp(~seed) +
labs(legend = "output") +
ggtitle(paste0("final Rd data for model_ ",model.type.1))
final_train_data_Rd.1
#### Model.2 ####
Rd_path.2 = paste0(outputs.path.2,"Rd/")
obb_error_Rd.2 = fread(paste0(Rd_path.2,model.type.2,"_obb_error_Rd",".csv"))
performance_table_Rd.2 = fread(paste0(Rd_path.2,model.type.2,"_performance_table_Rd",".csv"))
predictedLabels_Rd.2 = fread(paste0(Rd_path.2,model.type.2,"_predictedLabels_table_Rd",".csv"))
FinalTrainData_Rd.2 = fread(paste0(Rd_path.2,model.type.2,"_FinalTrainData_Rd",".csv"))
train_candidates_Rd.2 = fread(paste0(Rd_path.2,model.type.2,"_train_candidates_table_Rd",".csv"))
performance_molten_Rd_seq.2 <- melt(data = performance_table_Rd.2
, id.vars = c('iter',"seed","rep"))
setnames(performance_molten_Rd_seq.2, c("variable","value"),c("errortype","errorvalue"))
predictedLabels_molten_Rd_seq.2 <- melt(data = predictedLabels_Rd.2
, id.vars = c("density","%-similar-wanted",'output',"seed","rep")
, measure.vars = c("pred_output_1","pred_output_2","pred_output_3","pred_output_4","pred_output_5","pred_output_6","pred_output_7","pred_output_8","pred_output_9","pred_output_10","pred_output_11"))
setnames(predictedLabels_molten_Rd_seq.2, c("variable","value"),c("iter","pred_output"))
predictedLabels_molten_Rd_seq.2[,iter := as.numeric(str_sub(iter, 13))]
#### oob error ####
oob_Rd_seq_plot.2 = ggplot(obb_error_Rd.2, aes(x=iter, y = obb_error)) +
geom_line( aes(color = as.factor(rep))) +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential oob error with Rd Sampling for ",model.type.2))
oob_Rd_seq_plot.2
#### test error ####
test_error_Rd_seq_plot.2 = ggplot(performance_molten_Rd_seq.2, aes(x=iter,y = errorvalue, group=errortype, col=errortype)) +
geom_line() +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential test error with Rd Sampling for ",model.type.2))
test_error_Rd_seq_plot.2
#### Actual vs Fitted ####
slct_rep = 10
slct_seed= 2
a_vs_f_Rd_seq.2 <- ggplot(predictedLabels_molten_Rd_seq.2[rep == slct_rep & seed == slct_seed]
,aes(x = output, y =pred_output, color = pred_output - output)) +
geom_point() +
geom_abline() +
facet_wrap( ~ iter) +
xlab("actual values") +
ylab("fitted values") +
ggtitle(paste0("Actual vs Fitted for Rd Sampling","\n","rep:",slct_rep," and seed:",slct_seed,"\n","for ",model.type.2))
a_vs_f_Rd_seq.2
#### Replications on Each Iteration ####
# Observe the boxplots in each iterations
Rd_seq_ind_bxp.2 = ggplot(performance_molten_Rd_seq.2, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot()+
facet_wrap(~iter) +
# geom_hline(data = performance_molten_oneshot, aes(yintercept = errorvalue, group=errortype, col=errortype),stat = "hline", linetype = "dashed") +
ggtitle(paste0("sequential test performances","\n","with Rd Sampling for model_",model.type.2))
Rd_seq_ind_bxp.2
Rd_seq_ind_bxp_seed.2 = ggplot(performance_molten_Rd_seq.2
, aes(y = errorvalue, group = errortype, col = errortype)) +
geom_boxplot() +
facet_grid(seed ~ iter ) +
ggtitle(paste0("sequential test performances by Seeds","\n","with Rd Sampling for ",model.type.2))
Rd_seq_ind_bxp_seed.2
#### Overall BoxPlot ####
Rd_seq_bxp.2 = ggplot(data = performance_molten_Rd_seq.2[iter == 11], aes( y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
labs( caption = paste0("final meanRMSE : ",mean(performance_table_Rd.2[iter == 11]$rmse))) +
ggtitle(paste0("final iteration test performace","\n"," with Rd Sampling for ",model.type.2))
Rd_seq_bxp.2
boxplot.stats(performance_table_Rd.2[iter == 11]$rmse)
#### Train Candidates ####
tc_Rd_seq_plot.2 = ggplot(adaptive_initial_data.2, aes(x = density, y = `%-similar-wanted`)) +
geom_point(color = "grey") +
geom_point(data = train_candidates_Rd.2[rep == 10], aes(colour = as.factor(iter))) +
facet_wrap(~ seed) +
labs(legend = "output") +
ggtitle(paste0("Train Candidates for each Seed","\n","with Rd Sampling for ",model.type.2))
tc_Rd_seq_plot.2
#### Final Data ####
final_train_data_Rd.2 = ggplot(data = FinalTrainData_Rd.2, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(colour = output)) +
facet_grid(seed~rep) +
#facet_warp(~rep) +
#facet_warp(~seed) +
labs(legend = "output") +
ggtitle(paste0("final Rd data for model_ ",model.type.2))
final_train_data_Rd.2
grid.arrange(Rd_seq_ind_bxp.1,Rd_seq_ind_bxp.2)
grid.arrange(Rd_seq_bxp.1,Rd_seq_bxp.2)
#### Adaptive Sampling ####
#### Model.1 ####
Ad_path.1 = paste0(outputs.path.1,"Ad_sd/")
obb_error_Ad.1 = fread(paste0(Ad_path.1,model.type.1,"_obb_error_Ad",".csv"))
performance_table_Ad.1 = fread(paste0(Ad_path.1,model.type.1,"_performance_table_Ad",".csv"))
predictedLabels_Ad.1 = fread(paste0(Ad_path.1,model.type.1,"_predictedLabels_table_Ad",".csv"))
FinalTrainData_Ad.1 = fread(paste0(Ad_path.1,model.type.1,"_FinalTrainData_Ad",".csv"))
train_candidates_Ad.1 = fread(paste0(Ad_path.1,model.type.1,"_train_candidates_table_Ad",".csv"))
performance_molten_Ad_seq.1 <- melt(data = performance_table_Ad.1
, id.vars = c('iter',"seed","rep"))
setnames(performance_molten_Ad_seq.1, c("variable","value"),c("errortype","errorvalue"))
predictedLabels_molten_Ad_seq.1 <- melt(data = predictedLabels_Ad.1
, id.vars = c("density","%-similar-wanted",'output',"seed","rep")
, measure.vars = c("pred_output_1","pred_output_2","pred_output_3","pred_output_4","pred_output_5","pred_output_6","pred_output_7","pred_output_8","pred_output_9","pred_output_10","pred_output_11"))
setnames(predictedLabels_molten_Ad_seq.1, c("variable","value"),c("iter","pred_output"))
predictedLabels_molten_Ad_seq.1[,iter := as.numeric(str_sub(iter, 13))]
#### oob error ####
oob_Ad_seq_plot.1 = ggplot(obb_error_Ad.1, aes(x=iter, y = obb_error)) +
geom_line( aes(color = as.factor(rep))) +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential oob error with Ad Sampling for ",model.type.1))
oob_Ad_seq_plot.1
#### test error ####
test_error_Ad_seq_plot.1 = ggplot(performance_molten_Ad_seq.1, aes(x=iter,y = errorvalue, group=errortype, col=errortype)) +
geom_line() +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential test error with Ad Sampling for ",model.type.1))
test_error_Ad_seq_plot.1
#### Actual vs Fitted ####
# The last iteration(pred_output_11) on the 10th replication
slct_rep = 10
slct_seed= 2
a_vs_f_Ad_seq.1 <- ggplot(predictedLabels_molten_Ad_seq.1[rep == slct_rep & seed == slct_seed]
,aes(x = output, y =pred_output, color = pred_output - output)) +
geom_point() +
geom_abline() +
facet_wrap( ~ iter) +
xlab("actual values") +
ylab("fitted values") +
ggtitle(paste0("Actual vs Fitted for Ad Sampling ","\n","rep:",slct_rep," and seed:",slct_seed,"\n","for ",model.type.1))
a_vs_f_Ad_seq.1
#### Replications on Each Iteration ####
# Observe the boxplots in each iterations
Ad_seq_ind_bxp.1 = ggplot(performance_molten_Ad_seq.1, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot()+
facet_wrap(~iter) +
# geom_hline(data = performance_molten_oneshot, aes(yintercept = errorvalue, group=errortype, col=errortype),stat = "hline", linetype = "dashed") +
ggtitle(paste0("sequential test performances","\n","with Ad Sampling for model_",model.type.1))
Ad_seq_ind_bxp.1
Ad_seq_ind_bxp_seed.1 = ggplot(performance_molten_Ad_seq.1
, aes(y = errorvalue, group = errortype, col = errortype)) +
geom_boxplot() +
facet_grid(seed ~ iter ) +
ggtitle(paste0("sequential test performances by Seeds","\n","with Ad Sampling for ",model.type.1))
Ad_seq_ind_bxp_seed.1
#### Overall BoxPlot ####
Ad_seq_bxp.1 = ggplot(data = performance_molten_Ad_seq.1[iter == 11], aes( y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
labs( caption = paste0("final meanRMSE : ",mean(performance_table_Ad.1[iter == 11]$rmse))) +
ggtitle(paste0("final iteration test performace","\n"," with Ad Sampling for ",model.type.1))
Ad_seq_bxp.1
boxplot.stats(performance_table_Ad.1[iter == 11]$rmse)
#### Train Candidates ####
tc_Ad_seq_plot.2 = ggplot(adaptive_initial_data.1, aes(x = density, y = `%-similar-wanted`)) +
geom_point(color = "grey") +
geom_point(data = train_candidates_Ad.1[rep == 10], aes(colour = as.factor(iter))) +
facet_wrap(~ seed) +
labs(legend = "output") +
ggtitle(paste0("Train Candidates for each Seed","\n","with Ad Sampling for ",model.type.1))
tc_Ad_seq_plot.2
#### Final Data ####
final_train_data_Ad.1 = ggplot(data = FinalTrainData_Ad.1, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(colour = output)) +
facet_grid(seed~rep) +
#facet_warp(~rep) +
#facet_warp(~seed) +
labs(legend = "output") +
ggtitle(paste0("final Ad data for model_ ",model.type.1))
final_train_data_Ad.1
#### Model.1 ####
Ad_path.2 = paste0(outputs.path.2,"Ad_sd/")
obb_error_Ad.2 = fread(paste0(Ad_path.2,model.type.2,"_obb_error_Ad",".csv"))
performance_table_Ad.2 = fread(paste0(Ad_path.2,model.type.2,"_performance_table_Ad",".csv"))
predictedLabels_Ad.2 = fread(paste0(Ad_path.2,model.type.2,"_predictedLabels_table_Ad",".csv"))
FinalTrainData_Ad.2 = fread(paste0(Ad_path.2,model.type.2,"_FinalTrainData_Ad",".csv"))
train_candidates_Ad.2 = fread(paste0(Ad_path.2,model.type.2,"_train_candidates_table_Ad",".csv"))
performance_molten_Ad_seq.2 <- melt(data = performance_table_Ad.2
, id.vars = c('iter',"seed","rep"))
setnames(performance_molten_Ad_seq.2, c("variable","value"),c("errortype","errorvalue"))
predictedLabels_molten_Ad_seq.2 <- melt(data = predictedLabels_Ad.2
, id.vars = c("density","%-similar-wanted",'output',"seed","rep")
, measure.vars = c("pred_output_1","pred_output_2","pred_output_3","pred_output_4","pred_output_5","pred_output_6","pred_output_7","pred_output_8","pred_output_9","pred_output_10","pred_output_11"))
setnames(predictedLabels_molten_Ad_seq.2, c("variable","value"),c("iter","pred_output"))
predictedLabels_molten_Ad_seq.2[,iter := as.numeric(str_sub(iter, 13))]
#### oob error ####
oob_Ad_seq_plot.2 = ggplot(obb_error_Ad.2, aes(x=iter, y = obb_error)) +
geom_line( aes(color = as.factor(rep))) +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential oob error with Ad Sampling for ",model.type.2))
oob_Ad_seq_plot.2
#### test error ####
test_error_Ad_seq_plot.2 = ggplot(performance_molten_Ad_seq.2, aes(x=iter,y = errorvalue, group=errortype, col=errortype)) +
geom_line() +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential test error with Ad Sampling for ",model.type.2))
test_error_Ad_seq_plot.2
#### Actual vs Fitted ####
slct_rep = 10
slct_seed= 2
a_vs_f_Ad_seq.2 <- ggplot(predictedLabels_molten_Ad_seq.2[rep == slct_rep & seed == slct_seed]
,aes(x = output, y =pred_output, color = pred_output - output)) +
geom_point() +
geom_abline() +
facet_wrap( ~ iter) +
xlab("actual values") +
ylab("fitted values") +
ggtitle(paste0("Actual vs Fitted for Ad Sampling","\n","rep:",slct_rep," and seed:",slct_seed,"\n","for ",model.type.2))
a_vs_f_Ad_seq.2
#### Replications on Each Iteration ####
# Observe the boxplots in each iterations
Ad_seq_ind_bxp.2 = ggplot(performance_molten_Ad_seq.2, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot()+
facet_wrap(~iter) +
# geom_hline(data = performance_molten_oneshot, aes(yintercept = errorvalue, group=errortype, col=errortype),stat = "hline", linetype = "dashed") +
ggtitle(paste0("sequential test performances","\n","with Ad Sampling for model_",model.type.2))
Ad_seq_ind_bxp.2
Ad_seq_ind_bxp_seed.2 = ggplot(performance_molten_Ad_seq.2
, aes(y = errorvalue, group = errortype, col = errortype)) +
geom_boxplot() +
facet_grid(seed ~ iter ) +
ggtitle(paste0("sequential test performances by Seeds","\n","with Ad Sampling for ",model.type.2))
Ad_seq_ind_bxp_seed.2
#### Overall BoxPlot ####
Ad_seq_bxp.2 = ggplot(data = performance_molten_Ad_seq.2[iter == 11], aes( y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
labs( caption = paste0("final meanRMSE : ",mean(performance_table_Ad.2[iter == 11]$rmse))) +
ggtitle(paste0("final iteration test performace","\n"," with Ad Sampling for ",model.type.2))
Ad_seq_bxp.2
boxplot.stats(performance_table_Ad.2[iter == 11]$rmse)
#### Train Candidates ####
tc_Ad_seq_plot.2 = ggplot(adaptive_initial_data.2, aes(x = density, y = `%-similar-wanted`)) +
geom_point(color = "grey") +
geom_point(data = train_candidates_Ad.2[rep == 10], aes(colour = as.factor(iter))) +
facet_wrap(~ seed) +
labs(legend = "output") +
ggtitle(paste0("Train Candidates for each Seed","\n","with Ad Sampling for ",model.type.2))
tc_Ad_seq_plot.2
#### Final Data ####
final_train_data_Ad.2 = ggplot(data = FinalTrainData_Ad.2, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(colour = output)) +
facet_grid(seed~rep) +
#facet_warp(~rep) +
#facet_warp(~seed) +
labs(legend = "output") +
ggtitle(paste0("final Ad data for model_ ",model.type.2))
final_train_data_Ad.2
grid.arrange(Ad_seq_ind_bxp.1,Ad_seq_ind_bxp.2)
grid.arrange(Ad_seq_bxp.1,Ad_seq_bxp.2)
#### all BoxPlot ####
all_bxp.1 = ggplot(data = rbind(data.table(performance_molten_Ad_seq.1[iter == 11], sampling = "3Ad")
,data.table(performance_molten_Rd_seq.1[iter == 11], sampling = "2Rd")
,data.table(performance_molten_oneshot.1, sampling = "1oneshot"))
, aes( y = errorvalue, group=errortype, col=errortype)) +
facet_wrap(~sampling) +
geom_boxplot(aes(colour = errortype)) +
ggtitle(paste0("final iteration test performace","\n"," with 3 samplings for ",model.type.1))
all_bxp.1
#### all BoxPlot ####
all_bxp.2 = ggplot(data = rbind(data.table(performance_molten_Ad_seq.2[iter == 11], sampling = "3Ad")
,data.table(performance_molten_Rd_seq.2[iter == 11], sampling = "2Rd")
,data.table(performance_molten_oneshot.2, sampling = "1oneshot"))
, aes( y = errorvalue, group=errortype, col=errortype)) +
facet_wrap(~sampling) +
geom_boxplot(aes(colour = errortype)) +
ggtitle(paste0("final iteration test performace","\n"," with 3 Samplings for ",model.type.2))
all_bxp.2
#### Adaptive Sampling With Range ####
#### Model.1 ####
Ad_range_path.1 = paste0(outputs.path.1,"Ad_range/")
obb_error_Ad_range.1 = fread(paste0(Ad_range_path.1,model.type.1,"_obb_error_Ad",".csv"))
performance_table_Ad_range.1 = fread(paste0(Ad_range_path.1,model.type.1,"_performance_table_Ad",".csv"))
predictedLabels_Ad_range.1 = fread(paste0(Ad_range_path.1,model.type.1,"_predictedLabels_table_Ad",".csv"))
FinalTrainData_Ad_range.1 = fread(paste0(Ad_range_path.1,model.type.1,"_FinalTrainData_Ad",".csv"))
train_candidates_Ad_range.1 = fread(paste0(Ad_range_path.1,model.type.1,"_train_candidates_table_Ad",".csv"))
performance_molten_Ad_range_seq.1 <- melt(data = performance_table_Ad_range.1
, id.vars = c('iter',"seed","rep"))
setnames(performance_molten_Ad_range_seq.1, c("variable","value"),c("errortype","errorvalue"))
predictedLabels_molten_Ad_range_seq.1 <- melt(data = predictedLabels_Ad_range.1
, id.vars = c("density","%-similar-wanted",'output',"seed","rep")
, measure.vars = c("pred_output_1","pred_output_2","pred_output_3","pred_output_4","pred_output_5","pred_output_6","pred_output_7","pred_output_8","pred_output_9","pred_output_10","pred_output_11"))
setnames(predictedLabels_molten_Ad_range_seq.1, c("variable","value"),c("iter","pred_output"))
predictedLabels_molten_Ad_range_seq.1[,iter := as.numeric(str_sub(iter, 13))]
#### oob error ####
oob_Ad_range_seq_plot.1 = ggplot(obb_error_Ad_range.1, aes(x=iter, y = obb_error)) +
geom_line( aes(color = as.factor(rep))) +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential oob error with Ad_range Sampling for ",model.type.1))
oob_Ad_range_seq_plot.1
#### test error ####
test_error_Ad_range_seq_plot.1 = ggplot(performance_molten_Ad_range_seq.1, aes(x=iter,y = errorvalue, group=errortype, col=errortype)) +
geom_line() +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential test error with Ad_range Sampling for ",model.type.1))
test_error_Ad_range_seq_plot.1
#### Actual vs Fitted ####
# The last iteration(pred_output_11) on the 10th replication
slct_rep = 10
slct_seed= 2
a_vs_f_Ad_range_seq.1 <- ggplot(predictedLabels_molten_Ad_range_seq.1[rep == slct_rep & seed == slct_seed]
,aes(x = output, y =pred_output, color = pred_output - output)) +
geom_point() +
geom_abline() +
facet_wrap( ~ iter) +
xlab("actual values") +
ylab("fitted values") +
ggtitle(paste0("Actual vs Fitted for Ad_range Sampling ","\n","rep:",slct_rep," and seed:",slct_seed,"\n","for ",model.type.1))
a_vs_f_Ad_range_seq.1
#### Replications on Each Iteration ####
# Observe the boxplots in each iterations
Ad_range_seq_ind_bxp.1 = ggplot(performance_molten_Ad_range_seq.1, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot()+
facet_wrap(~iter) +
# geom_hline(data = performance_molten_oneshot, aes(yintercept = errorvalue, group=errortype, col=errortype),stat = "hline", linetype = "dashed") +
ggtitle(paste0("sequential test performances","\n","with Ad_range Sampling for model_",model.type.1))
Ad_range_seq_ind_bxp.1
Ad_range_seq_ind_bxp_seed.1 = ggplot(performance_molten_Ad_range_seq.1
, aes(y = errorvalue, group = errortype, col = errortype)) +
geom_boxplot() +
facet_grid(seed ~ iter ) +
ggtitle(paste0("sequential test performances by Seeds","\n","with Ad_range Sampling for ",model.type.1))
Ad_range_seq_ind_bxp_seed.1
#### Overall BoxPlot ####
Ad_range_seq_bxp.1 = ggplot(data = performance_molten_Ad_range_seq.1[iter == 11], aes( y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
labs( caption = paste0("final meanRMSE : ",mean(performance_table_Ad_range.1[iter == 11]$rmse))) +
ggtitle(paste0("final iteration test performace","\n"," with Ad_range Sampling for ",model.type.1))
Ad_range_seq_bxp.1
boxplot.stats(performance_table_Ad_range.1[iter == 11]$rmse)
#### Train Candidates ####
tc_Ad_range_seq_plot.2 = ggplot(adaptive_initial_data.1, aes(x = density, y = `%-similar-wanted`)) +
geom_point(color = "grey") +
geom_point(data = train_candidates_Ad_range.1[rep == 10], aes(colour = as.factor(iter))) +
facet_wrap(~ seed) +
labs(legend = "output") +
ggtitle(paste0("Train Candidates for each Seed","\n","with Ad_range Sampling for ",model.type.1))
tc_Ad_range_seq_plot.2
#### Final Data ####
final_train_data_Ad_range.1 = ggplot(data = FinalTrainData_Ad_range.1, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(colour = output)) +
facet_grid(seed~rep) +
#facet_warp(~rep) +
#facet_warp(~seed) +
labs(legend = "output") +
ggtitle(paste0("final Ad_range data for model_ ",model.type.1))
final_train_data_Ad_range.1
#### Model.2 ####
Ad_range_path.2 = paste0(outputs.path.2,"Ad_range/")
obb_error_Ad_range.2 = fread(paste0(Ad_range_path.2,model.type.2,"_obb_error_Ad",".csv"))
performance_table_Ad_range.2 = fread(paste0(Ad_range_path.2,model.type.2,"_performance_table_Ad",".csv"))
predictedLabels_Ad_range.2 = fread(paste0(Ad_range_path.2,model.type.2,"_predictedLabels_table_Ad",".csv"))
FinalTrainData_Ad_range.2 = fread(paste0(Ad_range_path.2,model.type.2,"_FinalTrainData_Ad",".csv"))
train_candidates_Ad_range.2 = fread(paste0(Ad_range_path.2,model.type.2,"_train_candidates_table_Ad",".csv"))
performance_molten_Ad_range_seq.2 <- melt(data = performance_table_Ad_range.2
, id.vars = c('iter',"seed","rep"))
setnames(performance_molten_Ad_range_seq.2, c("variable","value"),c("errortype","errorvalue"))
predictedLabels_molten_Ad_range_seq.2 <- melt(data = predictedLabels_Ad_range.2
, id.vars = c("density","%-similar-wanted",'output',"seed","rep")
, measure.vars = c("pred_output_1","pred_output_2","pred_output_3","pred_output_4","pred_output_5","pred_output_6","pred_output_7","pred_output_8","pred_output_9","pred_output_10","pred_output_11"))
setnames(predictedLabels_molten_Ad_range_seq.2, c("variable","value"),c("iter","pred_output"))
predictedLabels_molten_Ad_range_seq.2[,iter := as.numeric(str_sub(iter, 13))]
#### oob error ####
oob_Ad_range_seq_plot.2 = ggplot(obb_error_Ad_range.2, aes(x=iter, y = obb_error)) +
geom_line( aes(color = as.factor(rep))) +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential oob error with Ad_range Sampling for ",model.type.2))
oob_Ad_range_seq_plot.2
#### test error ####
test_error_Ad_range_seq_plot.2 = ggplot(performance_molten_Ad_range_seq.2, aes(x=iter,y = errorvalue, group=errortype, col=errortype)) +
geom_line() +
facet_grid(rep ~ seed) +
ggtitle(paste0("sequential test error with Ad_range Sampling for ",model.type.2))
test_error_Ad_range_seq_plot.2
#### Actual vs Fitted ####
slct_rep = 10
slct_seed= 2
a_vs_f_Ad_range_seq.2 <- ggplot(predictedLabels_molten_Ad_range_seq.2[rep == slct_rep & seed == slct_seed]
,aes(x = output, y =pred_output, color = pred_output - output)) +
geom_point() +
geom_abline() +
facet_wrap( ~ iter) +
xlab("actual values") +
ylab("fitted values") +
ggtitle(paste0("Actual vs Fitted for Ad_range Sampling","\n","rep:",slct_rep," and seed:",slct_seed,"\n","for ",model.type.2))
a_vs_f_Ad_range_seq.2
#### Replications on Each Iteration ####
# Observe the boxplots in each iterations
Ad_range_seq_ind_bxp.2 = ggplot(performance_molten_Ad_range_seq.2, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot()+
facet_wrap(~iter) +
# geom_hline(data = performance_molten_oneshot, aes(yintercept = errorvalue, group=errortype, col=errortype),stat = "hline", linetype = "dashed") +
ggtitle(paste0("sequential test performances","\n","with Ad_range Sampling for model_",model.type.2))
Ad_range_seq_ind_bxp.2
Ad_range_seq_ind_bxp_seed.2 = ggplot(performance_molten_Ad_range_seq.2
, aes(y = errorvalue, group = errortype, col = errortype)) +
geom_boxplot() +
facet_grid(seed ~ iter ) +
ggtitle(paste0("sequential test performances by Seeds","\n","with Ad_range Sampling for ",model.type.2))
Ad_range_seq_ind_bxp_seed.2
#### Overall BoxPlot ####
Ad_range_seq_bxp.2 = ggplot(data = performance_molten_Ad_range_seq.2[iter == 11], aes( y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
labs( caption = paste0("final meanRMSE : ",mean(performance_table_Ad_range.2[iter == 11]$rmse))) +
ggtitle(paste0("final iteration test performace","\n"," with Ad Sampling for ",model.type.2))
Ad_range_seq_bxp.2
boxplot.stats(performance_table_Ad_range.2[iter == 11]$rmse)
#### Train Candidates ####
tc_Ad_range_seq_plot.2 = ggplot(adaptive_initial_data.2, aes(x = density, y = `%-similar-wanted`)) +
geom_point(color = "grey") +
geom_point(data = train_candidates_Ad_range.2[rep == 10], aes(colour = as.factor(iter))) +
facet_wrap(~ seed) +
labs(legend = "output") +
ggtitle(paste0("Train Candidates for each Seed","\n","with Ad_range Sampling for ",model.type.2))
tc_Ad_range_seq_plot.2
#### Final Data ####
final_train_data_Ad_range.2 = ggplot(data = FinalTrainData_Ad_range.2, aes(x = density, y = `%-similar-wanted`)) +
geom_point(aes(colour = output)) +
facet_grid(seed~rep) +
#facet_warp(~rep) +
#facet_warp(~seed) +
labs(legend = "output") +
ggtitle(paste0("final Ad_range data for model_ ",model.type.2))
final_train_data_Ad_range.2
grid.arrange(Ad_range_seq_ind_bxp.1,Ad_range_seq_ind_bxp.2)
grid.arrange(Ad_range_seq_bxp.1,Ad_range_seq_bxp.2)
AdS_seq_ind_bxp.1 = ggplot(rbind(data.table(performance_molten_Ad_seq.1, sampling = "1Ad")
,data.table(performance_molten_Ad_range_seq.1, sampling = "2Ad_range"))
, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot()+
facet_grid(sampling~iter) +
# geom_hline(data = performance_molten_oneshot, aes(yintercept = errorvalue, group=errortype, col=errortype),stat = "hline", linetype = "dashed") +
ggtitle(paste0("sequential test performances","\n","with Ad Samplings for model_",model.type.1))
AdS_seq_ind_bxp.1
grid.arrange(Ad_seq_bxp.1,Ad_range_seq_bxp.1, ncol = 2)
AdS_seq_ind_bxp.2 = ggplot(rbind(data.table(performance_molten_Ad_seq.2, sampling = "1Ad")
,data.table(performance_molten_Ad_range_seq.2, sampling = "2Ad_range"))
, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot()+
facet_grid(sampling~iter) +
# geom_hline(data = performance_molten_oneshot, aes(yintercept = errorvalue, group=errortype, col=errortype),stat = "hline", linetype = "dashed") +
ggtitle(paste0("sequential test performances","\n","with Ad Samplings for model_",model.type.2))
AdS_seq_ind_bxp.2
grid.arrange(Ad_seq_bxp.2,Ad_range_seq_bxp.2, ncol = 2)
rmse_graph_molten_data.1 = rbind(data.table(performance_molten_Ad_seq.1[errortype == "rmse"], sampling = "2Ad", oneshot = 0)
,data.table(performance_molten_oneshot.1[errortype == "rmse",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "2Ad",oneshot=1)
,data.table(performance_molten_Ad_range_seq.1[errortype == "rmse"], sampling = "3Ad_range",oneshot=0)
,data.table(performance_molten_oneshot.1[errortype == "rmse",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "3Ad_range",oneshot=1)
,data.table(performance_molten_Rd_seq.1[errortype == "rmse"], sampling = "1Rd",oneshot=0)
,data.table(performance_molten_oneshot.1[errortype == "rmse",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "1Rd",oneshot=1)
)
ggplot(rmse_graph_molten_data.1, aes(y = errorvalue, col=as.factor(oneshot))) +
geom_boxplot()+
facet_grid(sampling~iter) +
ggtitle(paste0("RMSE plots of Sequential Sampling vs oneshot(iter= 12)","\n","for dummy"))
mae_graph_molten_data.1 = rbind(data.table(performance_molten_Ad_seq.1[errortype == "mae"], sampling = "2Ad", oneshot = 0)
,data.table(performance_molten_oneshot.1[errortype == "mae",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "2Ad",oneshot=1)
,data.table(performance_molten_Ad_range_seq.1[errortype == "mae"], sampling = "3Ad_range",oneshot=0)
,data.table(performance_molten_oneshot.1[errortype == "mae",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "3Ad_range",oneshot=1)
,data.table(performance_molten_Rd_seq.1[errortype == "mae"], sampling = "1Rd",oneshot=0)
,data.table(performance_molten_oneshot.1[errortype == "mae",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "1Rd",oneshot=1)
)
ggplot(mae_graph_molten_data.1, aes(y = errorvalue, col=as.factor(oneshot))) +
geom_boxplot()+
facet_grid(sampling~iter) +
ggtitle(paste0("MAE plots of Sequential Sampling vs oneshot(iter= 12)","\n","for dummy"))
mape_graph_molten_data.1 = rbind(data.table(performance_molten_Ad_seq.1[errortype == "mape"], sampling = "2Ad", oneshot = 0)
,data.table(performance_molten_oneshot.1[errortype == "mape",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "2Ad",oneshot=1)
,data.table(performance_molten_Ad_range_seq.1[errortype == "mape"], sampling = "3Ad_range",oneshot=0)
,data.table(performance_molten_oneshot.1[errortype == "mape",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "3Ad_range",oneshot=1)
,data.table(performance_molten_Rd_seq.1[errortype == "mape"], sampling = "1Rd",oneshot=0)
,data.table(performance_molten_oneshot.1[errortype == "mape",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "1Rd",oneshot=1)
)
ggplot(mape_graph_molten_data.1, aes(y = errorvalue, col=as.factor(oneshot))) +
geom_boxplot()+
facet_grid(sampling~iter) +
ggtitle(paste0("MAPE plots of Sequential Sampling vs oneshot(iter= 12)","\n","for dummy"))
all_range_bxp.1 = ggplot(data = rbind(data.table(performance_molten_Ad_range_seq.1[iter == 11], sampling = "4Ad_range")
,data.table(performance_molten_Ad_seq.1[iter == 11], sampling = "3Ad")
,data.table(performance_molten_Rd_seq.1[iter == 11], sampling = "1Rd")
,data.table(performance_molten_oneshot.1, sampling = "2oneshot"))
, aes( y = errorvalue, group=errortype, col=errortype)) +
facet_wrap(~sampling, ncol = 4) +
geom_boxplot(aes(colour = errortype)) +
ggtitle(paste0("final iteration test performace","\n"," with 4 samplings for ",model.type.1))
all_range_bxp.1
rmse_graph_molten_data.2 = rbind(data.table(performance_molten_Ad_seq.2[errortype == "rmse"], sampling = "2Ad", oneshot = 0)
,data.table(performance_molten_oneshot.2[errortype == "rmse",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "2Ad",oneshot=1)
,data.table(performance_molten_Ad_range_seq.2[errortype == "rmse"], sampling = "3Ad_range",oneshot=0)
,data.table(performance_molten_oneshot.2[errortype == "rmse",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "3Ad_range",oneshot=1)
,data.table(performance_molten_Rd_seq.2[errortype == "rmse"], sampling = "1Rd",oneshot=0)
,data.table(performance_molten_oneshot.2[errortype == "rmse",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "1Rd",oneshot=1)
)
ggplot(rmse_graph_molten_data.2, aes(y = errorvalue, col=as.factor(oneshot))) +
geom_boxplot()+
facet_grid(sampling~iter) +
ggtitle(paste0("RMSE plots of Sequential Sampling vs oneshot(iter= 12)","\n","for dummy"))
mae_graph_molten_data.2 = rbind(data.table(performance_molten_Ad_seq.2[errortype == "mae"], sampling = "2Ad", oneshot = 0)
,data.table(performance_molten_oneshot.2[errortype == "mae",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "2Ad",oneshot=1)
,data.table(performance_molten_Ad_range_seq.2[errortype == "mae"], sampling = "3Ad_range",oneshot=0)
,data.table(performance_molten_oneshot.2[errortype == "mae",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "3Ad_range",oneshot=1)
,data.table(performance_molten_Rd_seq.2[errortype == "mae"], sampling = "1Rd",oneshot=0)
,data.table(performance_molten_oneshot.2[errortype == "mae",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "1Rd",oneshot=1)
)
ggplot(mae_graph_molten_data.2, aes(y = errorvalue, col=as.factor(oneshot))) +
geom_boxplot()+
facet_grid(sampling~iter) +
ggtitle(paste0("MAE plots of Sequential Sampling vs oneshot(iter= 12)","\n","for dummy"))
mape_graph_molten_data.2 = rbind(data.table(performance_molten_Ad_seq.2[errortype == "mape"], sampling = "2Ad", oneshot = 0)
,data.table(performance_molten_oneshot.2[errortype == "mape",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "2Ad",oneshot=1)
,data.table(performance_molten_Ad_range_seq.2[errortype == "mape"], sampling = "3Ad_range",oneshot=0)
,data.table(performance_molten_oneshot.2[errortype == "mape",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "3Ad_range",oneshot=1)
,data.table(performance_molten_Rd_seq.2[errortype == "mape"], sampling = "1Rd",oneshot=0)
,data.table(performance_molten_oneshot.2[errortype == "mape",.(iter = 12,seed,rep,errortype,errorvalue)], sampling = "1Rd",oneshot=1)
)
ggplot(mape_graph_molten_data.2, aes(y = errorvalue, col=as.factor(oneshot))) +
geom_boxplot()+
facet_grid(sampling~iter) +
ggtitle(paste0("MAPE plots of Sequential Sampling vs oneshot(iter= 12)","\n","for dummy"))
all_range_bxp.2 = ggplot(data = rbind(data.table(performance_molten_Ad_range_seq.2[iter == 11], sampling = "4Ad_range")
,data.table(performance_molten_Ad_seq.2[iter == 11], sampling = "3Ad")
,data.table(performance_molten_Rd_seq.2[iter == 11], sampling = "1Rd")
,data.table(performance_molten_oneshot.2, sampling = "2oneshot"))
, aes( y = errorvalue, group=errortype, col=errortype)) +
facet_wrap(~sampling, ncol = 4) +
geom_boxplot(aes(colour = errortype)) +
ggtitle(paste0("final iteration test performace","\n"," with 4 samplings for ",model.type.2))
all_range_bxp.2
#### Metamodel Replications On Final Data Rd ####
if (model.type.1 == "basic") {
feature_names.1 = c("density", "%-similar-wanted")
feature_names.2 = c("density", "%-similar-wanted", "budget-multiplier-dummy", "density-multiplier-dummy",
"noise-dummy", "tick-limit")
} else if (model.type.1 == "dummy") {
feature_names.1 = c("density", "%-similar-wanted", "budget-multiplier-dummy", "density-multiplier-dummy",
"noise-dummy", "tick-limit")
feature_names.2 = c("density", "%-similar-wanted")
}
feature_names.1
feature_names.2
#### Model.1 ####
mtry = 2
ntree = 300
predictedLabels_Rd_rep.1 = data.table()
performance_table_Rd_rep.1 = data.table(seed = numeric(), rep = numeric(), new_rep = numeric(),mae = numeric(),
rmse = numeric(), mape = numeric())
for (s in seed.Ad.1) {
for (k in 1:10) {
trainx.1 = FinalTrainData_Rd.1[seed == s & rep == k, .SD, .SDcols = feature_names.1]
trainy.1 = FinalTrainData_Rd.1[seed == s & rep == k]$output
test_set.1.retrain = copy(test_set.1)
for (r in 1:10) {
model_Sub.1 <- randomForest(x = trainx.1, y = trainy.1, importance = TRUE,
ntree = ntree, mtry = mtry)
predictedLabels.1 <- predict(model_Sub.1, test_set.1.retrain)
predictedLabels.1 <- cbind(test_set.1.retrain, data.table(pred_output = predictedLabels.1,
seed = s, rep = k, new_rep = r))
predictedLabels_Rd_rep.1 <- rbind(predictedLabels_Rd_rep.1, predictedLabels.1)
output_variables = colnames(select(predictedLabels.1, contains("output")))
output_variables_1 = predictedLabels.1[, get(output_variables[1]), with = TRUE]
output_variables_2 = predictedLabels.1[, get(output_variables[2]), with = TRUE]
performance_temp = matrix(c(1:3), nrow = 1, ncol = 3)
performance_temp[1] = mae(output_variables_1, output_variables_2)
performance_temp[2] = rmse(output_variables_1, output_variables_2)
performance_temp[3] = mape(output_variables_1, output_variables_2)
performance_table_Rd_rep.1 = rbind(performance_table_Rd_rep.1, data.table(s,
r,k, performance_temp), use.names = FALSE)
}
rm(trainx.1, trainy.1)
}
}
performance_molten_Rd.1 <- melt(data = performance_table_Rd_rep.1
, id.vars = c("seed","rep","new_rep"))
setnames(performance_molten_Rd.1, c("variable","value"),c("errortype","errorvalue"))
#### Replications on Each Seed ####
Rd_ind_bxp.1 = ggplot(performance_molten_Rd.1, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(varwidth = TRUE)+
facet_wrap(~seed,ncol=3) +
ggtitle(paste0(" test performances","\n","with Rd_WRep Sampling for model_",model.type.1))
Rd_ind_bxp.1
bxp.Rd.1 <- boxplot( data = performance_molten_Rd.1[errortype == "rmse"], errorvalue ~ seed, col = "pink")
bxp.Rd.1
performance_table_Rd_rep.1[, .(mean_rmse = mean(rmse)),.(seed)][order(mean_rmse)]
Rd_bxp.1 = ggplot(data = performance_molten_Rd.1, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
# facet_wrap(~ seed) +
labs( caption = paste0("meanRMSE : ",mean(performance_table_Rd_rep.1$rmse))) +
ggtitle(paste0("overall test performace","\n"," with Rd_WRep Sampling for ",model.type.1))
Rd_bxp.1
boxplot.stats(performance_table_Rd_rep.1$rmse)
#### Model.2 ####
mtry = 2
ntree = 300
predictedLabels_Rd_rep.2 = data.table()
performance_table_Rd_rep.2 = data.table(seed = numeric(), rep = numeric(), new_rep = numeric(), mae = numeric(),
rmse = numeric(), mape = numeric())
for (s in seed.Ad.2) {
for (k in 1:10) {
trainx.2 = FinalTrainData_Rd.2[seed == s & rep == k, .SD, .SDcols = feature_names.2]
trainy.2 = FinalTrainData_Rd.2[seed == s & rep == k]$output
test_set.2.retrain = copy(test_set.2)
for (r in 1:10) {
model_Sub.2 <- randomForest(x = trainx.2, y = trainy.2, importance = TRUE,
ntree = ntree, mtry = mtry)
predictedLabels.2 <- predict(model_Sub.2, test_set.2.retrain)
predictedLabels.2 <- cbind(test_set.2.retrain, data.table(pred_output = predictedLabels.2,
seed = s, rep = r, new_rep = r))
predictedLabels_Rd_rep.2 <- rbind(predictedLabels_Rd_rep.2, predictedLabels.2)
output_variables = colnames(select(predictedLabels.2, contains("output")))
output_variables_1 = predictedLabels.2[, get(output_variables[1]), with = TRUE]
output_variables_2 = predictedLabels.2[, get(output_variables[2]), with = TRUE]
performance_temp = matrix(c(1:3), nrow = 1, ncol = 3)
performance_temp[1] = mae(output_variables_1, output_variables_2)
performance_temp[2] = rmse(output_variables_1, output_variables_2)
performance_temp[3] = mape(output_variables_1, output_variables_2)
performance_table_Rd_rep.2 = rbind(performance_table_Rd_rep.2, data.table(s,
r,k, performance_temp), use.names = FALSE)
}
rm(trainx.2, trainy.2)
}
}
performance_molten_Rd.2 <- melt(data = performance_table_Rd_rep.2
, id.vars = c("seed","rep","new_rep"))
setnames(performance_molten_Rd.2, c("variable","value"),c("errortype","errorvalue"))
#### Replications on Each Seed ####
Rd_ind_bxp.2 = ggplot(performance_molten_Rd.2, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(varwidth = TRUE)+
facet_wrap(~seed,ncol=3) +
ggtitle(paste0(" test performances","\n","with Rd_WRep Sampling for model_",model.type.2))
Rd_ind_bxp.2
bxp.Rd.2 <- boxplot( data = performance_molten_Rd.2[errortype == "rmse"], errorvalue ~ seed, col = "pink")
bxp.Rd.2
performance_table_Rd_rep.2[, .(mean_rmse = mean(rmse)),.(seed)][order(mean_rmse)]
Rd_bxp.2 = ggplot(data = performance_molten_Rd.2, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
# facet_wrap(~ seed) +
labs( caption = paste0("meanRMSE : ",mean(performance_table_Rd_rep.2$rmse))) +
ggtitle(paste0("overall test performace","\n"," with Rd_WRep Sampling for ",model.type.2))
Rd_bxp.2
boxplot.stats(performance_table_Rd_rep.2$rmse)
#### Model.1 ####
mtry = 2
ntree = 300
predictedLabels_Ad_rep.1 = data.table()
performance_table_Ad_rep.1 = data.table(seed = numeric(), rep = numeric(), new_rep = numeric(), mae = numeric(),
rmse = numeric(), mape = numeric())
for (s in seed.Ad.1) {
for (k in 1:10) {
trainx.1 = FinalTrainData_Ad.1[seed == s & rep == k, .SD, .SDcols = feature_names.1]
trainy.1 = FinalTrainData_Ad.1[seed == s & rep == k]$output
test_set.1.retrain = copy(test_set.1)
for (r in 1:10) {
model_Sub.1 <- randomForest(x = trainx.1, y = trainy.1, importance = TRUE,
ntree = ntree, mtry = mtry)
predictedLabels.1 <- predict(model_Sub.1, test_set.1.retrain)
predictedLabels.1 <- cbind(test_set.1.retrain, data.table(pred_output = predictedLabels.1,
seed = s, rep = r, new_rep = k))
predictedLabels_Ad_rep.1 <- rbind(predictedLabels_Ad_rep.1, predictedLabels.1)
output_variables = colnames(select(predictedLabels.1, contains("output")))
output_variables_1 = predictedLabels.1[, get(output_variables[1]), with = TRUE]
output_variables_2 = predictedLabels.1[, get(output_variables[2]), with = TRUE]
performance_temp = matrix(c(1:3), nrow = 1, ncol = 3)
performance_temp[1] = mae(output_variables_1, output_variables_2)
performance_temp[2] = rmse(output_variables_1, output_variables_2)
performance_temp[3] = mape(output_variables_1, output_variables_2)
performance_table_Ad_rep.1 = rbind(performance_table_Ad_rep.1, data.table(s,
r,k, performance_temp), use.names = FALSE)
}
rm(trainx.1, trainy.1)
}
}
performance_molten_Ad.1 <- melt(data = performance_table_Ad_rep.1
, id.vars = c("seed","rep","new_rep"))
setnames(performance_molten_Ad.1, c("variable","value"),c("errortype","errorvalue"))
#### Replications on Each Seed ####
Ad_ind_bxp.1 = ggplot(performance_molten_Ad.1, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(varwidth = TRUE)+
facet_wrap(~seed,ncol=3) +
ggtitle(paste0(" test performances","\n","with Ad_WRep Sampling for model_",model.type.1))
Ad_ind_bxp.1
bxp.Ad.1 <- boxplot( data = performance_molten_Ad.1[errortype == "rmse"], errorvalue ~ seed, col = "pink")
bxp.Ad.1
performance_table_Ad_rep.1[, .(mean_rmse = mean(rmse)),.(seed)][order(mean_rmse)]
Ad_bxp.1 = ggplot(data = performance_molten_Ad.1, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
# facet_wrap(~ seed) +
labs( caption = paste0("meanRMSE : ",mean(performance_table_Ad_rep.1$rmse))) +
ggtitle(paste0("overall test performace","\n"," with Ad_WRep Sampling for ",model.type.1))
Ad_bxp.1
boxplot.stats(performance_table_Ad_rep.1$rmse)
#### Model.2 ####
mtry = 2
ntree = 300
predictedLabels_Ad_rep.2 = data.table()
performance_table_Ad_rep.2 = data.table(seed = numeric(), rep = numeric(), new_rep = numeric(), mae = numeric(),
rmse = numeric(), mape = numeric())
for (s in seed.Ad.2) {
for (k in 1:10) {
trainx.2 = FinalTrainData_Ad.2[seed == s & rep == k, .SD, .SDcols = feature_names.2]
trainy.2 = FinalTrainData_Ad.2[seed == s & rep == k]$output
test_set.2.retrain = copy(test_set.2)
for (r in 1:10) {
model_Sub.2 <- randomForest(x = trainx.2, y = trainy.2, importance = TRUE,
ntree = ntree, mtry = mtry)
predictedLabels.2 <- predict(model_Sub.2, test_set.2.retrain)
predictedLabels.2 <- cbind(test_set.2.retrain, data.table(pred_output = predictedLabels.2,
seed = s, rep = r, new_rep = k))
predictedLabels_Ad_rep.2 <- rbind(predictedLabels_Ad_rep.2, predictedLabels.2)
output_variables = colnames(select(predictedLabels.2, contains("output")))
output_variables_1 = predictedLabels.2[, get(output_variables[1]), with = TRUE]
output_variables_2 = predictedLabels.2[, get(output_variables[2]), with = TRUE]
performance_temp = matrix(c(1:3), nrow = 1, ncol = 3)
performance_temp[1] = mae(output_variables_1, output_variables_2)
performance_temp[2] = rmse(output_variables_1, output_variables_2)
performance_temp[3] = mape(output_variables_1, output_variables_2)
performance_table_Ad_rep.2 = rbind(performance_table_Ad_rep.2, data.table(s,
r,k, performance_temp), use.names = FALSE)
}
rm(trainx.2, trainy.2)
}
}
performance_molten_Ad.2 <- melt(data = performance_table_Ad_rep.2
, id.vars = c("seed","rep","new_rep"))
setnames(performance_molten_Ad.2, c("variable","value"),c("errortype","errorvalue"))
#### Replications on Each Seed ####
Ad_ind_bxp.2 = ggplot(performance_molten_Ad.2, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(varwidth = TRUE)+
facet_wrap(~seed,ncol=3) +
ggtitle(paste0(" test performances","\n","with Ad_WRep Sampling for model_",model.type.2))
Ad_ind_bxp.2
bxp.Ad.2 <- boxplot( data = performance_molten_Ad.2[errortype == "rmse"], errorvalue ~ seed, col = "pink")
bxp.Ad.2
performance_table_Ad_rep.2[, .(mean_rmse = mean(rmse)),.(seed)][order(mean_rmse)]
Ad_bxp.2 = ggplot(data = performance_molten_Ad.2, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
# facet_wrap(~ seed) +
labs( caption = paste0("meanRMSE : ",mean(performance_table_Ad_rep.2$rmse))) +
ggtitle(paste0("overall test performace","\n"," with Ad_WRep Sampling for ",model.type.2))
Ad_bxp.2
boxplot.stats(performance_table_Ad_rep.2$rmse)
#### Model.1 ####
mtry = 2
ntree = 300
predictedLabels_Ad_range_rep.1 = data.table()
performance_table_Ad_range_rep.1 = data.table(seed = numeric(), rep = numeric(), new_rep = numeric(),
mae = numeric(), rmse = numeric(), mape = numeric())
for (s in seed.Ad.1) {
for (k in 1:10) {
trainx.1 = FinalTrainData_Ad_range.1[seed == s & rep == k, .SD, .SDcols = feature_names.1]
trainy.1 = FinalTrainData_Ad_range.1[seed == s & rep == k]$output
test_set.1.retrain = copy(test_set.1)
for (r in 1:10) {
model_Sub.1 <- randomForest(x = trainx.1, y = trainy.1, importance = TRUE,
ntree = ntree, mtry = mtry)
predictedLabels.1 <- predict(model_Sub.1, test_set.1.retrain)
predictedLabels.1 <- cbind(test_set.1.retrain, data.table(pred_output = predictedLabels.1,
seed = s, rep = r, new_rep = k))
predictedLabels_Ad_range_rep.1 <- rbind(predictedLabels_Ad_range_rep.1,
predictedLabels.1)
output_variables = colnames(select(predictedLabels.1, contains("output")))
output_variables_1 = predictedLabels.1[, get(output_variables[1]), with = TRUE]
output_variables_2 = predictedLabels.1[, get(output_variables[2]), with = TRUE]
performance_temp = matrix(c(1:3), nrow = 1, ncol = 3)
performance_temp[1] = mae(output_variables_1, output_variables_2)
performance_temp[2] = rmse(output_variables_1, output_variables_2)
performance_temp[3] = mape(output_variables_1, output_variables_2)
performance_table_Ad_range_rep.1 = rbind(performance_table_Ad_range_rep.1,
data.table(s, r,k, performance_temp), use.names = FALSE)
}
rm(trainx.1, trainy.1)
}
}
performance_molten_Ad_range.1 <- melt(data = performance_table_Ad_range_rep.1
, id.vars = c("seed","rep","new_rep"))
setnames(performance_molten_Ad_range.1, c("variable","value"),c("errortype","errorvalue"))
#### Replications on Each Seed ####
Ad_range_ind_bxp.1 = ggplot(performance_molten_Ad_range.1, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(varwidth = TRUE)+
facet_wrap(~seed,ncol=3) +
ggtitle(paste0(" test performances","\n","with Ad_range_WRep Sampling for model_",model.type.1))
Ad_range_ind_bxp.1
bxp.Ad_range.1 <- boxplot( data = performance_molten_Ad_range.1[errortype == "rmse"], errorvalue ~ seed, col = "pink")
bxp.Ad_range.1
performance_table_Ad_range_rep.1[, .(mean_rmse = mean(rmse)),.(seed)][order(mean_rmse)]
Ad_range_bxp.1 = ggplot(data = performance_molten_Ad_range.1, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
# facet_wrap(~ seed) +
labs( caption = paste0("meanRMSE : ",mean(performance_table_Ad_range_rep.1$rmse))) +
ggtitle(paste0("overall test performace","\n"," with Ad_range_WRep Sampling for ",model.type.1))
Ad_range_bxp.1
boxplot.stats(performance_table_Ad_range_rep.1$rmse)
#### Model.2 ####
mtry = 2
ntree = 300
predictedLabels_Ad_range_rep.2 = data.table()
performance_table_Ad_range_rep.2 = data.table(seed = numeric(), rep = numeric(), new_rep = numeric(),
mae = numeric(), rmse = numeric(), mape = numeric())
for (s in seed.Ad.2) {
for (k in 1:10) {
trainx.2 = FinalTrainData_Ad_range.2[seed == s & rep == k, .SD, .SDcols = feature_names.2]
trainy.2 = FinalTrainData_Ad_range.2[seed == s & rep == k]$output
test_set.2.retrain = copy(test_set.2)
for (r in 1:10) {
model_Sub.2 <- randomForest(x = trainx.2, y = trainy.2, importance = TRUE,
ntree = ntree, mtry = mtry)
predictedLabels.2 <- predict(model_Sub.2, test_set.2.retrain)
predictedLabels.2 <- cbind(test_set.2.retrain, data.table(pred_output = predictedLabels.2,
seed = s, rep = r, new_rep = k))
predictedLabels_Ad_range_rep.2 <- rbind(predictedLabels_Ad_range_rep.2,
predictedLabels.2)
output_variables = colnames(select(predictedLabels.2, contains("output")))
output_variables_1 = predictedLabels.2[, get(output_variables[1]), with = TRUE]
output_variables_2 = predictedLabels.2[, get(output_variables[2]), with = TRUE]
performance_temp = matrix(c(1:3), nrow = 1, ncol = 3)
performance_temp[1] = mae(output_variables_1, output_variables_2)
performance_temp[2] = rmse(output_variables_1, output_variables_2)
performance_temp[3] = mape(output_variables_1, output_variables_2)
performance_table_Ad_range_rep.2 = rbind(performance_table_Ad_range_rep.2,
data.table(s,r,k,performance_temp), use.names = FALSE)
}
rm(trainx.2, trainy.2)
}
}
performance_molten_Ad_range.2 <- melt(data = performance_table_Ad_range_rep.2
, id.vars = c("seed","rep","new_rep"))
setnames(performance_molten_Ad_range.2, c("variable","value"),c("errortype","errorvalue"))
#### Replications on Each Seed ####
Ad_range_ind_bxp.2 = ggplot(performance_molten_Ad_range.2, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(varwidth = TRUE)+
facet_wrap(~seed,ncol=3) +
ggtitle(paste0(" test performances","\n","with Ad_range_WRep Sampling for model_",model.type.2))
Ad_range_ind_bxp.2
bxp.Ad_range.2 <- boxplot( data = performance_molten_Ad_range.2[errortype == "rmse"], errorvalue ~ seed, col = "pink")
bxp.Ad_range.2
performance_table_Ad_range_rep.2[, .(mean_rmse = mean(rmse)),.(seed)][order(mean_rmse)]
Ad_range_bxp.2 = ggplot(data = performance_molten_Ad_range.2, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(aes(colour = errortype)) +
# facet_wrap(~ seed) +
labs( caption = paste0("meanRMSE : ",mean(performance_table_Ad_range_rep.2$rmse))) +
ggtitle(paste0("overall test performace","\n"," with Ad_range_WRep Sampling for ",model.type.2))
Ad_range_bxp.2
boxplot.stats(performance_table_Ad_range_rep.2$rmse)
Results shown here includes:
head(performance_molten_Rd.1)
all_rep_ind_bxp.1 = ggplot(rbind( data.table(performance_molten_Ad_range.1[,.(errortype,errorvalue)], sampling = "4Ad_range")
,data.table(performance_molten_Ad.1[,.(errortype,errorvalue)], sampling = "3Ad")
,data.table(performance_molten_Rd.1[,.(errortype,errorvalue)], sampling = "1Rd")
,data.table(performance_molten_oneshot.1[,.(errortype,errorvalue)], sampling = "2oneshot"))
, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(varwidth = TRUE)+
facet_grid(~sampling) +
ggtitle(paste0(" test performances after replication","\n","with 4 Samplings for model_",model.type.1))
all_rep_ind_bxp.1
grid.arrange(all_range_bxp.1,all_rep_ind_bxp.1)
all_rep_ind_bxp.2 = ggplot(rbind( data.table(performance_molten_Ad_range.2[,.(errortype,errorvalue)], sampling = "4Ad_range")
,data.table(performance_molten_Ad.2[,.(errortype,errorvalue)], sampling = "3Ad")
,data.table(performance_molten_Rd.2[,.(errortype,errorvalue)], sampling = "1Rd")
,data.table(performance_molten_oneshot.2[,.(errortype,errorvalue)], sampling = "2oneshot"))
, aes(y = errorvalue, group=errortype, col=errortype)) +
geom_boxplot(varwidth = TRUE)+
facet_grid(~sampling) +
ggtitle(paste0(" test performances after replication","\n","with 4 Samplings for model_",model.type.2))
all_rep_ind_bxp.2
grid.arrange(all_range_bxp.2,all_rep_ind_bxp.2)